library(tidyverse)
library(cowplot)
library(GGally)
library(heatmaply)
library(sva)
library(limma)
library(biobroom)
library(ggridges)
library(ggthemes)
theme_set(theme_minimal())
# Cells #
vf.cell.neg.raw <- read_csv("./data/abundances/p5fa_vpa_exp_hilic_target_cells_negmode_abundances.csv")
vf.cell.pos.raw <- read_csv("./data/abundances/p5fa_vpa_exp_hilic_target_cells_posmode_abundances.csv")
# Media #
vf.med.neg.raw <- read_csv("./data/abundances/p5fa_vpa_exp_hilic_target_media_negmode_abundances.csv")
vf.med.pos.raw <- read_csv("./data/abundances/p5fa_vpa_exp_hilic_target_media_posmode_abundances.csv")
# Cells #
vf.cell.neg.compound.info <- read_csv("./data/compound_info/p5fa_vpa_exp_hilic_target_cells_negmode_cmpnd_info.csv")
vf.cell.pos.compound.info <- read_csv("./data/compound_info/p5fa_vpa_exp_hilic_target_cells_posmode_cmpnd_info.csv")
# Media #
vf.med.neg.compound.info <- read_csv("./data/compound_info/p5fa_vpa_exp_hilic_target_media_negmode_cmpnd_info.csv")
vf.med.pos.compound.info <- read_csv("./data/compound_info/p5fa_vpa_exp_hilic_target_media_posmode_cmpnd_info.csv")
# Kegg/other ID reference #
#cmpnd.id.db <- read_csv("./data/anp_db_compound_kegg.csv")
MissingPerSamplePlot <- function(raw.data, start.string) {
# Counts the number of missing/NA values per sample and
# percent compounds missing out of total number of compounds per sample
# Then passes the result into a vertical bar plot, where each
# bar represents a single sample and the size of the bar
# is the % of compounds missing
counted.na <- raw.data %>%
select(starts_with(start.string)) %>%
mutate(
count.na = apply(., 1, function(x) sum(is.na(x))),
percent.na = (count.na / ncol(raw.data %>% select(starts_with(start.string)))) * 100
) %>%
dplyr::select(count.na, percent.na) %>%
bind_cols(
raw.data %>%
select(Samples, Group)
) %>%
arrange(percent.na) %>%
mutate(f.order = factor(Samples, levels = Samples))
counted.na %>%
ggplot(aes(x = f.order, y = percent.na, fill = Group)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 20, color = "gray", size = 1, alpha = 0.8) +
coord_flip()+
xlab("Samples") +
ylab("Percent missing values in sample") +
theme(axis.text.y = element_text(size = 6))
}
MissingPerCompound <- function(raw.data, start.string){
# Function to count in how many experimental samples each compound is missing
# Also calculates the % missing:
# (# NA per compound across all experimental samples) * 100 / (tot num of samples)
raw.data %>%
filter(Group == "sample") %>%
select(Samples, starts_with(start.string)) %>%
gather(key = "Compound", value = "Abundance", -Samples) %>%
group_by(Compound) %>%
summarise(
na_count = sum(is.na(Abundance)),
n_samples = n(),
percent_na = (na_count * 100 / n_samples)
) %>%
filter(na_count > 0) %>%
arrange(desc(percent_na))
}
ReplaceNAwMinLogTransformMult <- function(raw.dataframe, start.prefix) {
# Function to replace any NAs in each column with the minimum for that column,
# separately for each sample type.
# NA in negative control samples are replaced by 2.
# Then data is log2 transformed
# experimental samples #
smpls <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(starts_with(start.prefix))
smpls.min <- lapply(smpls, min, na.rm = TRUE)
smpls.noNA <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
smpls %>%
replace_na(replace = smpls.min) %>%
log2()
)
# QC samples #
QC <- raw.dataframe %>%
filter(Group == "mix") %>%
dplyr::select(starts_with(start.prefix))
QC.min <- lapply(QC, min, na.rm = TRUE)
# replace the missing values in the QC with the minimum of the QC
# then take the log
QC.noNA <- raw.dataframe %>%
filter(Group == "mix") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
QC %>%
replace_na(replace = QC.min) %>%
log2()
)
# not samples or QC #
other.min <- setNames(
as.list(
rep(2, ncol(
raw.dataframe %>%
dplyr::select(starts_with(start.prefix))))
),
colnames(raw.dataframe %>% dplyr::select(starts_with(start.prefix)))
)
other.num.log <- raw.dataframe %>%
filter(Group != "mix" & Group != "sample") %>%
dplyr::select(Samples:Plate) %>%
bind_cols(
raw.dataframe %>%
filter(Group != "mix" & Group != "sample") %>%
dplyr::select(starts_with(start.prefix)) %>%
replace_na(replace = other.min) %>%
log2()
)
all.noNA <- smpls.noNA %>%
bind_rows(QC.noNA) %>%
bind_rows(other.num.log)
}
HeatmapPrepAlt <- function(raw.data, start.prefix){
# function for preparing dara for heatmap viz
x <- raw.data %>%
select(starts_with(start.prefix)) %>%
scale(center = TRUE, scale = TRUE) %>%
as.matrix()
row.names(x) <- raw.data$Samples
return(x)
}
Q: What are the distributions of compound masses and retention times?
full.vf.cmpnd <- vf.cell.neg.compound.info %>%
mutate(Set = "Cells / Neg") %>%
bind_rows(vf.cell.pos.compound.info %>% mutate(Set = "Cells / Pos")) %>%
bind_rows(vf.med.neg.compound.info %>% mutate(Set = "Media / Neg")) %>%
bind_rows(vf.med.pos.compound.info %>% mutate(Set = "Media / Pos"))
full.vf.cmpnd %>%
ggplot(aes(x = rt, y = mass)) +
geom_point(size = 3, alpha = 0.3) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA + FA HILIC") +
ylim(0, 1000)
full.vf.cmpnd %>%
ggplot(aes(x = rt, y = mass, color = Set)) +
geom_point(size = 3, alpha = 0.8) +
xlab("Retnetion Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA + FA HILIC") +
facet_wrap(~ Set) +
ylim(0, 1000)
Q: Which compounds were found in one or more of the data types?
## cell join ##
vf.cell.cmpnd.join <- vf.cell.neg.compound.info %>%
inner_join(vf.cell.pos.compound.info, by = "cas_id", suffix = c(".c.n", ".c.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / cells
print(vf.cell.cmpnd.join$compound_full.c.n)
[1] "Glycine"
[2] "Pyruvate"
[3] "Alanine"
[4] "Beta-Alanine"
[5] "Sarcosine"
[6] "2-Aminobutyric Acid"
[7] "BAIBA"
[8] "GABA"
[9] "Serine"
[10] "Hypotaurine"
[11] "Uracil"
[12] "Creatinine"
[13] "Proline"
[14] "Valine"
[15] "Threonine"
[16] "Homoserine"
[17] "Taurine"
[18] "Ketoleucine"
[19] "N-Acetylalanine"
[20] "Creatine"
[21] "Leucine"
[22] "Isoleucine"
[23] "Asparagine"
[24] "Ornithine"
[25] "Aspartic Acid"
[26] "Adenine"
[27] "Glutamine"
[28] "Lysine"
[29] "Glutamic Acid"
[30] "Methionine"
[31] "Xanthine"
[32] "4-Hydroxyphenylacetic Acid"
[33] "3-Sulfinoalanine"
[34] "Histidine"
[35] "Allantoin"
[36] "5-Hydroxylysine"
[37] "Phenylalanine"
[38] "Pyridoxal"
[39] "Pyridoxine"
[40] "Glycerol 2-Phosphate"
[41] "Arginine"
[42] "Tyrosine"
[43] "D-Galactitol"
[44] "D-Sorbitol"
[45] "Phosphocholine"
[46] "N-alpha-Acetyl-L-glutamine"
[47] "Tryptophan"
[48] "Pantothenic Acid"
[49] "Cystathionine"
[50] "Methyl Jasmonate"
[51] "Carnosine"
[52] "Cytidine"
[53] "Uridine"
[54] "D-Glucose 6-phosphate"
[55] "D-Fructose 6-phosphate"
[56] "Thiamine (Vit B1)"
[57] "Inosine"
[58] "Guanosine"
[59] "Ophthalmic Acid"
[60] "5'-Methylthioadenosine"
[61] "N-Acetylaspartyl Glutamic Acid"
[62] "Glutathione (GSH)"
[63] "N-Acetylneuraminic Acid"
[64] "UMP"
[65] "3-Phosphoglyceroinositol"
[66] "AMP"
[67] "S-Adenosylhomocysteine (SAH)"
[68] "CDP"
[69] "ADP"
[70] "GDP"
[71] "UTP"
[72] "ATP"
[73] "GTP"
[74] "Cyclic adenosine diphosphate ribose (cADP-ribose)"
[75] "UDP-N-Acetylgalactosamine"
[76] "GSSG"
[77] "NAD"
[78] "NADH"
[79] "NADP"
[80] "Flavin adenine dinucleotide (FAD)"
[81] "Acetyl-CoA"
# percent of cell / neg compounds found in cell / pos
round(nrow(vf.cell.cmpnd.join) * 100 / nrow(vf.cell.neg.compound.info), 1)
[1] 58.3
# percent of cell / neg compounds found in cell / pos
round(nrow(vf.cell.cmpnd.join) * 100 / nrow(vf.cell.pos.compound.info), 1)
[1] 56.2
vf.cell.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
vf.cell.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
### Media join ###
vf.med.cmpnd.join <- vf.med.neg.compound.info %>%
inner_join(vf.med.pos.compound.info, by = "cas_id", suffix = c(".m.n", ".m.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / media
print(vf.med.cmpnd.join$compound_full.m.n)
[1] "Alanine" "Serine" "Creatinine"
[4] "Proline" "Valine" "Threonine"
[7] "Taurine" "Creatine" "Leucine"
[10] "Isoleucine" "Glutamine" "Lysine"
[13] "Glutamic Acid" "Methionine" "Histidine"
[16] "Allantoin" "Phenylalanine" "Pyridoxine"
[19] "Arginine" "Citrulline" "Tyrosine"
[22] "D-Sorbitol" "Tryptophan" "Pantothenic Acid"
[25] "Thiamine (Vit B1)"
# percent of media / neg compounds found in media / pos
round(nrow(vf.med.cmpnd.join) * 100 / nrow(vf.med.neg.compound.info), 1)
[1] 44.6
# percent of media / pos compounds found in media / neg
round(nrow(vf.med.cmpnd.join) * 100 / nrow(vf.med.pos.compound.info), 1)
[1] 53.2
# vf all match
vf.all.cmpnd.join <- vf.cell.cmpnd.join %>%
inner_join(vf.med.cmpnd.join, by = "cas_id") %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
nrow(vf.all.cmpnd.join)
[1] 24
vf.all.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
vf.all.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
Q: Do any of the samples have greater than 20% missing (NA) compound abundances, out of all of the features in the dataset?
MissingPerSamplePlot(vf.cell.neg.raw, "hVPA_FAnC") +
ggtitle("Missing Per Sample\nVPA + FA HILIC / Cells / Neg Mode")
MissingPerSamplePlot(vf.cell.pos.raw, "hVPA_FApC") +
ggtitle("Missing Per Sample\nVPA + FA HILIC / Cells / Pos Mode")
MissingPerSamplePlot(vf.med.neg.raw, "hVPA_FAnM") +
ggtitle("Missing Per Sample\nVPA + FA HILIC / Media / Neg Mode")
MissingPerSamplePlot(vf.med.pos.raw, "hVPA_FApM") +
ggtitle("Missing Per Sample\nVPA + FA HILIC / Media / Pos Mode")
Q: Are any of the compounds more than 20% missing in the experimental sample group? If there are any, they will be excluded from analysis.
(vf.cell.neg.cmpnd.to.excl <- MissingPerCompound(vf.cell.neg.raw, "hVPA_FAnC") %>%
filter(percent_na > 20))
# A tibble: 5 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 hVPA_FAnC139 14 22 63.6
2 hVPA_FAnC72 8 22 36.4
3 hVPA_FAnC113 7 22 31.8
4 hVPA_FAnC138 5 22 22.7
5 hVPA_FAnC64 5 22 22.7
MissingPerCompound(vf.cell.pos.raw, "hVPA_FApC") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
MissingPerCompound(vf.med.neg.raw, "hVPA_FAnM") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
MissingPerCompound(vf.med.pos.raw, "hVPA_FApM") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
vf.cell.neg.raw.grp.mean <- vf.cell.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("hVPA_FAnC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.cell.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA HILIC / Cells / Negative Mode\nGrouped by sample type")
vf.cell.neg.raw.grp.mean.order <- vf.cell.neg.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.cell.neg.raw %>%
select(Samples, Group, starts_with("hVPA_FAnC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA HILIC/ Cells / Negative Mode")
vf.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA HILIC / Cells / Negative Mode")
vf.cell.neg.raw.grp.diff <- vf.cell.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_solv_diff = sample / solv,
)
vf.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
vf.cell.neg.cmpnd.to.incl <- vf.cell.neg.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>%
filter(!(Compound %in% vf.cell.neg.cmpnd.to.excl$Compound))
# original number of metabolites
nrow(vf.cell.neg.raw.grp.diff)
[1] 139
# number of metabolites after filtering
nrow(vf.cell.neg.cmpnd.to.incl)
[1] 132
vf.cell.pos.raw.grp.mean <- vf.cell.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("hVPA_FApC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.cell.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA HILIC / Cells / Positive Mode\nGrouped by sample type")
vf.cell.pos.raw.grp.mean.order <- vf.cell.pos.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.cell.pos.raw %>%
select(Samples, Group, starts_with("hVPA_FApC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.raw.grp.diff <- vf.cell.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_solv_diff = sample / solv
)
vf.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vf.cell.pos.cmpnd.to.incl <- vf.cell.pos.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
# original number of metabolites
nrow(vf.cell.pos.raw.grp.diff)
[1] 144
# filtered number
nrow(vf.cell.pos.cmpnd.to.incl)
[1] 142
vf.med.neg.raw.grp.mean <- vf.med.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("hVPA_FAnM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.med.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA / Media HILIC / Negative Mode\nGrouped by sample type")
vf.med.neg.raw.grp.mean.order <- vf.med.neg.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.med.neg.raw %>%
select(Samples, Group, starts_with("hVPA_FAnM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 2) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA HILIC / Media / Negative Mode")
vf.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA HILIC / Media / Negative Mode")
vf.med.neg.raw.grp.diff <- vf.med.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_solv_diff = sample / solv
)
vf.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vf.med.neg.cmpnd.to.incl <- vf.med.neg.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
nrow(vf.med.neg.raw.grp.diff)
[1] 56
nrow(vf.med.neg.cmpnd.to.incl)
[1] 52
vf.med.pos.raw.grp.mean <- vf.med.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("hVPA_FApM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vf.med.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA + FA HILIC / Media / Positive Mode\nGrouped by sample type")
vf.med.pos.raw.grp.mean.order <- vf.med.pos.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vf.med.pos.raw %>%
select(Samples, Group, starts_with("hVPA_FApM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 2) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
geom_line(
data = vf.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA + FA HILIC / Media / Positive Mode")
vf.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vf.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA + FA HILIC / Media / Positive Mode")
vf.med.pos.raw.grp.diff <- vf.med.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_solv_diff = sample / solv
)
vf.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vf.med.pos.cmpnd.to.incl <- vf.med.pos.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
nrow(vf.med.pos.raw.grp.diff)
[1] 47
nrow(vf.med.pos.cmpnd.to.incl)
[1] 47
vf.cell.neg.noNA <- vf.cell.neg.raw %>%
select(Samples:Plate, one_of(vf.cell.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("hVPA_FAnC")
vf.cell.pos.noNA <- vf.cell.pos.raw %>%
select(Samples:Plate, one_of(vf.cell.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("hVPA_FApC")
vf.med.neg.noNA <- vf.med.neg.raw %>%
select(Samples:Plate, one_of(vf.med.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("hVPA_FAnM")
vf.med.pos.noNA <- vf.med.pos.raw %>%
select(Samples:Plate, one_of(vf.med.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformMult("hVPA_FApM")
vf.cell.neg.noNA.gathered <- vf.cell.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.cell.neg.noNA) == "hVPA_FAnC10"):ncol(vf.cell.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vf.cell.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA HILIC / Cells / Negative Mode")
# same data format, but as ridge plots
vf.cell.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA HILIC/ Cells / Negative Mode")
# experimental samples only
vf.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Cells / Negative Mode")
# overlay the distributions for another look
vf.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Cells / Negative Mode")
vf.cell.pos.noNA.gathered <- vf.cell.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.cell.pos.noNA) == "hVPA_FApC1"):ncol(vf.cell.pos.noNA)
)
vf.cell.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75, trim = TRUE, adjust = 3/4) +
xlab("log2(Abundance)") +
facet_wrap(~ Treatment)
vf.med.neg.noNA.gathered <- vf.med.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.med.neg.noNA) == "hVPA_FAnM10"):ncol(vf.med.neg.noNA)
)
vf.med.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA HILIC / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA HILIC / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Media / Negative Mode")
vf.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Media / Negative Mode")
vf.med.pos.noNA.gathered <- vf.med.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vf.med.pos.noNA) == "hVPA_FApM1"):ncol(vf.med.pos.noNA)
)
vf.med.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA + FA HILIC / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA + FA HILIC / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Media / Positive Mode")
vf.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA + FA HILIC / Media / Positive Mode")
Some plots to understand the relationship between the samples, QC samples, solvent, and empty samples in some cases.
### PCA on all Samples ###
vf.cell.neg.full.pca <- vf.cell.neg.noNA %>%
select(starts_with("hVPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.full.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA HILIC / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.full.pca.x <- as.data.frame(vf.cell.neg.full.pca$x)
row.names(vf.cell.neg.full.pca.x) <- vf.cell.neg.noNA$Samples
vf.cell.neg.full.pca.x <- vf.cell.neg.full.pca.x %>%
bind_cols(vf.cell.neg.noNA %>% select(Group:Plate))
vf.cell.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (96.4% Var)") +
ylab("PC2 (14.5% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA HILIC / Cells / Negative Mode")
### Samples and mix ###
vf.cell.neg.smpl.mix.pca <- vf.cell.neg.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(starts_with("hVPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.smpl.mix.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.smpl.mix.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and Mix\nVPA + FA HILIC / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.smpl.mix.pca.x <- as.data.frame(vf.cell.neg.smpl.mix.pca$x)
vf.cell.neg.smpl.mix.pca.x <- vf.cell.neg.smpl.mix.pca.x %>%
bind_cols(
vf.cell.neg.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.neg.smpl.mix.pca.x) <- vf.cell.neg.smpl.mix.pca.x$Samples
vf.cell.neg.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (63.1% Var)") +
ylab("PC2 (14.0% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA HILIC / Cells / Negative Mode")
vf.cell.neg.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (6.2% Var)") +
ylab("PC4 (4.7% Var)") +
ggtitle("Principal Component Analysis\nSamples and mix\nVPA + FA HILIC / Cells / Negative Mode")
### Experimental Samples Only ###
vf.cell.neg.smpl.pca <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("hVPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.neg.smpl.pca$sdev ^ 2) * 100 / sum(vf.cell.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA HILIC / Cells / Negative Mode",
type = "b"
)
vf.cell.neg.smpl.pca.x <- as.data.frame(vf.cell.neg.smpl.pca$x)
vf.cell.neg.smpl.pca.x <- vf.cell.neg.smpl.pca.x %>%
bind_cols(
vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.neg.smpl.pca.x) <- vf.cell.neg.smpl.pca.x$Samples
vf.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (65.3% Var)") +
ylab("PC2 (14.6% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA HILIC / Cells / Negative Mode")
vf.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (5.7% Var)") +
ylab("PC4 (4.2% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA HILIC/ Cells / Negative Mode")
vf.cell.pos.full.pca <- vf.cell.pos.noNA %>%
select(starts_with("hVPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.full.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA HILIC / Cells / Positive Mode",
type = "b"
)
vf.cell.pos.full.pca.x <- as.data.frame(vf.cell.pos.full.pca$x)
row.names(vf.cell.pos.full.pca.x) <- vf.cell.pos.noNA$Samples
vf.cell.pos.full.pca.x <- vf.cell.pos.full.pca.x %>%
bind_cols(vf.cell.pos.noNA %>% select(Group:Plate))
vf.cell.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (92.4% Var)") +
ylab("PC2 (2.7% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA HILIC / Cells / Positive Mode")
### Samples and Mix ###
vf.cell.pos.smpl.mix.pca <- vf.cell.pos.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(starts_with("hVPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.smpl.mix.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.smpl.mix.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and Mix\nVPA + FA HILIC / Cells / Positive Mode",
type = "b"
)
vf.cell.pos.smpl.mix.pca.x <- as.data.frame(vf.cell.pos.smpl.mix.pca$x)
vf.cell.pos.smpl.mix.pca.x <- vf.cell.pos.smpl.mix.pca.x %>%
bind_cols(
vf.cell.pos.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.pos.smpl.mix.pca.x) <- vf.cell.pos.smpl.mix.pca.x$Samples
vf.cell.pos.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_", remove = FALSE) %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (62.9% Var)") +
ylab("PC2 (13.6% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.2% Var)") +
ylab("PC4 (3.8% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA HILIC / Cells / Positive Mode")
### Experimental Samples Only ###
vf.cell.pos.smpl.pca <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("hVPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.cell.pos.smpl.pca$sdev ^ 2) * 100 / sum(vf.cell.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA HILIC/ Cells / Positive Mode",
type = "b"
)
vf.cell.pos.smpl.pca.x <- as.data.frame(vf.cell.pos.smpl.pca$x)
vf.cell.pos.smpl.pca.x <- vf.cell.pos.smpl.pca.x %>%
bind_cols(
vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.cell.pos.smpl.pca.x) <- vf.cell.pos.smpl.pca.x$Samples
vf.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (61.2% Var)") +
ylab("PC2 (15.2% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA HILIC / Cells / Positive Mode")
vf.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.7% Var)") +
ylab("PC4 (4.3% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA HILIC / Cells / Positive Mode")
### PCA on all Samples ###
vf.med.neg.full.pca <- vf.med.neg.noNA %>%
select(starts_with("hVPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.full.pca$sdev ^ 2) * 100 / sum(vf.med.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.full.pca.x <- as.data.frame(vf.med.neg.full.pca$x)
row.names(vf.med.neg.full.pca.x) <- vf.med.neg.noNA$Samples
vf.med.neg.full.pca.x <- vf.med.neg.full.pca.x %>%
bind_cols(vf.med.neg.noNA %>% select(Group:Plate))
vf.med.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (89.8% Var)") +
ylab("PC2 (4.7% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Media / Negative Mode")
### Samples and Mix ###
vf.med.neg.smpl.mix.pca <- vf.med.neg.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(starts_with("hVPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.smpl.mix.pca$sdev ^ 2) * 100 / sum(vf.med.neg.smpl.mix.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and Mix\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.smpl.mix.pca.x <- as.data.frame(vf.med.neg.smpl.mix.pca$x)
vf.med.neg.smpl.mix.pca.x <- vf.med.neg.smpl.mix.pca.x %>%
bind_cols(
vf.med.neg.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.neg.smpl.mix.pca.x) <- vf.med.neg.smpl.mix.pca.x$Samples
vf.med.neg.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (67.1% Var)") +
ylab("PC2 (19.6% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA / Media / Negative Mode")
vf.med.neg.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (4.3% Var)") +
ylab("PC4 (2.1% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA / Media / Negative Mode")
### Experimental Samples Only ###
vf.med.neg.smpl.pca <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("hVPA_FAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.neg.smpl.pca$sdev ^ 2) * 100 / sum(vf.med.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Media / Negative Mode",
type = "b"
)
vf.med.neg.smpl.pca.x <- as.data.frame(vf.med.neg.smpl.pca$x)
vf.med.neg.smpl.pca.x <- vf.med.neg.smpl.pca.x %>%
bind_cols(
vf.med.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.neg.smpl.pca.x) <- vf.med.neg.smpl.pca.x$Samples
vf.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (74.4% Var)") +
ylab("PC2 (12.1% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Negative Mode")
vf.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (4.5% Var)") +
ylab("PC4 (2.2% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Negative Mode")
### PCA on all Samples ###
vf.med.pos.full.pca <- vf.med.pos.noNA %>%
select(starts_with("hVPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.full.pca$sdev ^ 2) * 100 / sum(vf.med.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.full.pca.x <- as.data.frame(vf.med.pos.full.pca$x)
row.names(vf.med.pos.full.pca.x) <- vf.med.pos.noNA$Samples
vf.med.pos.full.pca.x <- vf.med.pos.full.pca.x %>%
bind_cols(vf.med.pos.noNA %>% select(Group:Plate))
vf.med.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (88.6% Var)") +
ylab("PC2 (4.7% Var)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA + FA / Media / Positive Mode")
### Samples and mix ###
vf.med.pos.smpl.mix.pca <- vf.med.pos.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(starts_with("hVPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.smpl.mix.pca$sdev ^ 2) * 100 / sum(vf.med.pos.smpl.mix.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and Mix\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.smpl.mix.pca.x <- as.data.frame(vf.med.pos.smpl.mix.pca$x)
vf.med.pos.smpl.mix.pca.x <- vf.med.pos.smpl.mix.pca.x %>%
bind_cols(
vf.med.pos.noNA %>%
filter(Group == "sample" | Group == "mix") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.pos.smpl.mix.pca.x) <- vf.med.pos.smpl.mix.pca.x$Samples
vf.med.pos.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (53.7% Var)") +
ylab("PC2 (18.9% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA / Media / Positive Mode")
vf.med.pos.smpl.mix.pca.x %>%
unite("Treatment", VPA:FA, sep = "_") %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.7% Var)") +
ylab("PC4 (6.0% Var)") +
ggtitle("Principal Component Analysis\nSamples and Mix\nVPA + FA / Media / Positive Mode")
### Experimental Samples Only ###
vf.med.pos.smpl.pca <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("hVPA_FApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vf.med.pos.smpl.pca$sdev ^ 2) * 100 / sum(vf.med.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA + FA / Media / Positive Mode",
type = "b"
)
vf.med.pos.smpl.pca.x <- as.data.frame(vf.med.pos.smpl.pca$x)
vf.med.pos.smpl.pca.x <- vf.med.pos.smpl.pca.x %>%
bind_cols(
vf.med.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Plate)
)
row.names(vf.med.pos.smpl.pca.x) <- vf.med.pos.smpl.pca.x$Samples
vf.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (53.8% Var)") +
ylab("PC2 (19.8% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode")
vf.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = FA, shape = VPA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.8% Var)") +
ylab("PC4 (5.8% Var)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA + FA / Media / Positive Mode")
# sample prep
vf.cell.neg.smpl.data <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.cell.neg.data.for.sva <- as.matrix(
vf.cell.neg.smpl.data[, which(
colnames(vf.cell.neg.smpl.data) == "hVPA_FAnC10"
):ncol(vf.cell.neg.smpl.data)]
)
row.names(vf.cell.neg.data.for.sva) <- vf.cell.neg.smpl.data$Samples
vf.cell.neg.data.for.sva <- t(vf.cell.neg.data.for.sva)
# pheno prep
vf.cell.neg.data.pheno <- as.data.frame(vf.cell.neg.smpl.data[, 5:6])
row.names(vf.cell.neg.data.pheno) <- vf.cell.neg.smpl.data$Samples
# sva calculation
vf.cell.neg.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.cell.neg.data.pheno)
vf.cell.neg.mod0 <- model.matrix(~ 1, data = vf.cell.neg.data.pheno)
set.seed(2018)
num.sv(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, method = "be")
[1] 1
set.seed(2018)
num.sv(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, method = "leek")
[1] 1
set.seed(2018)
vf.cell.neg.sv <- sva(vf.cell.neg.data.for.sva, vf.cell.neg.mod.vf, vf.cell.neg.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vf.cell.neg.surr.var <- as.data.frame(vf.cell.neg.sv$sv)
colnames(vf.cell.neg.surr.var) <- c("S1")
plot(vf.cell.neg.smpl.pca.x$PC1, vf.cell.pos.smpl.pca.x$PC1)
plot(vf.cell.pos.smpl.pca.x$PC1, vf.cell.neg.surr.var$S1)
vf.cell.neg.covar <- vf.cell.neg.smpl.pca.x %>%
select(Samples:Plate, PC1:PC4) %>%
bind_cols(vf.cell.neg.surr.var)
vf.cell.neg.covar %>%
select(PC1:S1) %>%
ggcorr(label = TRUE)
vf.cell.neg.covar %>%
select(PC1:S1) %>%
ggpairs()
colnames(vf.cell.neg.mod.vf) <- c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
# combine the full model matrix and the surrogate variables into one
vf.cell.neg.design.sv <- cbind(vf.cell.neg.mod.vf, vf.cell.neg.surr.var)
vf.cell.neg.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("fa_only", "cntrl", "fa_and_vpa", "vpa_only", "S1")
)
# fit the model/design matrix
vf.cell.neg.eb <- vf.cell.neg.data.for.sva %>%
lmFit(vf.cell.neg.design.sv) %>%
contrasts.fit(vf.cell.neg.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.cell.neg.eb.tidy <- tidy(vf.cell.neg.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.cell.neg.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlim(-2.5, 2.5) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA HILIC / Cells / Negative Mode")
# select statistically significant hits with a certain FC:
vf.cell.neg.hits <- vf.cell.neg.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.cell.neg.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.cell.neg.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 14 13
# significant metabolites
sort(unique(vf.cell.neg.hits$compound_full))
[1] "3-Sulfinoalanine"
[2] "3,4-Dihydroxyphenylacetic Acid (DOPAC)"
[3] "ATP"
[4] "BAIBA"
[5] "Caprylic Acid"
[6] "Creatinine"
[7] "Cystathionine"
[8] "D-Galactitol"
[9] "D-Glucose 6-phosphate"
[10] "D-Sorbitol"
[11] "Docosahexaenoic Acid (22:6 n-3)"
[12] "GABA"
[13] "Glutamic Acid"
[14] "GSSG"
[15] "myo-Inositol"
[16] "N-Acetylglutamic Acid"
[17] "N-Acetylserine"
[18] "Oleic Acid"
[19] "Palmitoleic Acid"
[20] "Proline"
[21] "Tryptophan"
[22] "UDP-N-Acetylgalactosamine"
[23] "UTP"
#write_csv(vf.cell.neg.hits, path = "./results/vpa_fa_p5_hilic_cell_neg_hits.csv")
vf.cell.neg.hits.tally2 <- vf.cell.neg.hits %>%
group_by(compound_short, compound_full) %>%
count() %>%
filter(n == 2)
vf.cell.neg.lowFA.hits <- vf.cell.neg.hits %>%
filter(term == "vpa_lowFA" & !(compound_short %in% vf.cell.neg.hits.tally2$compound_short))
vf.cell.neg.highFA.hits <- vf.cell.neg.hits %>%
filter(term == "vpa_highFA" & !(compound_short %in% vf.cell.neg.hits.tally2$compound_short))
vf.cell.neg.both.hits <- vf.cell.neg.hits %>%
filter(compound_short %in% vf.cell.neg.hits.tally2$compound_short) %>%
arrange(compound_short, term)
vf.cell.neg.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
ggplot(aes(vpa_lowFA, vpa_highFA)) +
geom_point(size = 2, alpha = 0.8) +
geom_abline(intercept = 0, slope = 1, color = "blue", alpha = 0.8)
vf.cell.neg.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
mutate(diff = vpa_highFA - vpa_lowFA) %>%
arrange(diff)
# A tibble: 4 x 4
compound_full vpa_highFA vpa_lowFA diff
<chr> <dbl> <dbl> <dbl>
1 Proline 2.66 3.00 -0.338
2 Cystathionine 2.00 2.25 -0.253
3 D-Sorbitol 0.374 0.456 -0.0824
4 Docosahexaenoic Acid (22:6 n-3) 2.94 1.86 1.08
### Plotting ###
vf.cell.neg.gathered <- vf.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vf.cell.neg.surr.var) %>%
select(Samples, VPA, FA, S1, starts_with("hVPA_FAnC")) %>%
gather(key = "Compound", value = "Abundance", hVPA_FAnC10:hVPA_FAnC99)
vf.cell.neg.nested <- vf.cell.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.cell.neg.modSV.resid <- vf.cell.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
vf.cell.neg.modSV.resid %>%
select(Samples:FA, one_of(unique(vf.cell.neg.hits$compound_short))) %>%
HeatmapPrepAlt("hVPA_FAnC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA + FA HILIC / Cells / Neg Mode",
# margins = c(50, 50, 75, 30),
k_col = 2, k_row = 2,
labRow = unique(vf.cell.neg.hits$compound_full)
)
### PCA - cleaned data ###
vf.cell.neg.modSV.pca <- vf.cell.neg.modSV.resid %>%
select(starts_with("hVPA_FAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vf.cell.neg.modSV.pca.x <- as.data.frame(vf.cell.neg.modSV.pca$x)
row.names(vf.cell.neg.modSV.pca.x) <- vf.cell.neg.modSV.resid$Samples
vf.cell.neg.modSV.pca.x <- vf.cell.neg.modSV.pca.x %>%
bind_cols(vf.cell.neg.modSV.resid %>% select(VPA:FA))
vf.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (35.3% Var)") +
ylab("PC2 (26.4% Var)")
vf.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.6% Var)") +
ylab("PC4 (5.7% Var)")
vf.cell.pos.smpl.data <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.cell.pos.data.for.sva <- as.matrix(
vf.cell.pos.smpl.data[, which(
colnames(vf.cell.pos.smpl.data) == "hVPA_FApC1"
):ncol(vf.cell.pos.smpl.data)]
)
row.names(vf.cell.pos.data.for.sva) <- vf.cell.pos.smpl.data$Samples
vf.cell.pos.data.for.sva <- t(vf.cell.pos.data.for.sva)
vf.cell.pos.data.pheno <- as.data.frame(vf.cell.pos.smpl.data[, 5:6])
row.names(vf.cell.pos.data.pheno) <- vf.cell.pos.smpl.data$Samples
vf.cell.pos.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.cell.pos.data.pheno)
vf.cell.pos.mod0 <- model.matrix(~ 1, data = vf.cell.pos.data.pheno)
set.seed(2018)
num.sv(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, method = "be")
[1] 1
set.seed(2018)
num.sv(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.cell.pos.sv <- sva(vf.cell.pos.data.for.sva, vf.cell.pos.mod.vf, vf.cell.pos.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
vf.cell.pos.surr.var <- as.data.frame(vf.cell.pos.sv$sv)
colnames(vf.cell.pos.surr.var) <- c("S1")
plot(vf.cell.pos.smpl.pca.x$PC1, vf.cell.pos.surr.var$S1)
vf.cell.pos.covar <- vf.cell.pos.smpl.pca.x %>%
select(Samples:Plate, PC1:PC4) %>%
bind_cols(vf.cell.pos.surr.var)
vf.cell.pos.covar %>%
select(PC1:S1) %>%
ggcorr(label = TRUE)
vf.cell.pos.covar %>%
select(PC1:S1) %>%
ggpairs()
colnames(vf.cell.pos.mod.vf) <- c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
vf.cell.pos.design.sv <- cbind(vf.cell.pos.mod.vf, vf.cell.pos.smpl.pca.x$PC1)
colnames(vf.cell.pos.design.sv)[5] <- "PC1"
vf.cell.pos.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("fa_only", "cntrl", "fa_and_vpa", "vpa_only", "PC1")
)
vf.cell.pos.eb <- vf.cell.pos.data.for.sva %>%
lmFit(vf.cell.pos.design.sv) %>%
contrasts.fit(vf.cell.pos.cont.mat) %>%
eBayes()
vf.cell.pos.eb.tidy <- tidy(vf.cell.pos.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
vf.cell.pos.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
#xlim(-2.5, 2.5) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / Cells / Positive Mode")
vf.cell.pos.hits <- vf.cell.pos.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.cell.pos.compound.info, by = "compound_short")
table(vf.cell.pos.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 11 23
sort(unique(vf.cell.pos.hits$compound_full))
[1] "2'-Deoxyadenosine" "Acetylputrescine"
[3] "Adenosine" "AMP"
[5] "Aspartic Acid" "CDP-Choline"
[7] "Cystathionine" "D-Fructose 6-phosphate"
[9] "D-Sorbitol" "dGMP"
[11] "GDP" "Glutathione (GSH)"
[13] "Glycerol-3-phosphocholine" "GMP"
[15] "Guanosine" "Homoserine"
[17] "Hypoxanthine" "Inosine"
[19] "N-Acetylaspartyl Glutamic Acid" "N-Acetylglucosamine"
[21] "N-Acetylmannosamine" "N-Methylnicotinate"
[23] "Nicotinamide" "Nicotinamide Mononucleotide"
[25] "Proline" "Pyridoxal"
[27] "UMP"
vf.cell.pos.hits
# A tibble: 34 x 14
compound_short term estimate statistic p.value lod adj_pval FC
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 hVPA_FApC102 vpa_~ 0.803 5.23 4.55e- 5 1.66 1.94e-2 1.74
2 hVPA_FApC103 vpa_~ 0.453 5.05 6.74e- 5 1.27 2.87e-2 1.37
3 hVPA_FApC105 vpa_~ -1.39 -7.86 1.94e- 7 7.19 8.27e-5 0.381
4 hVPA_FApC107 vpa_~ -1.36 -7.87 1.89e- 7 7.21 8.06e-5 0.390
5 hVPA_FApC113 vpa_~ -1.04 -5.16 5.35e- 5 1.50 2.28e-2 0.486
6 hVPA_FApC114 vpa_~ -1.44 -5.06 6.70e- 5 1.27 2.85e-2 0.369
7 hVPA_FApC117 vpa_~ -1.22 -5.36 3.41e- 5 1.95 1.45e-2 0.428
8 hVPA_FApC127 vpa_~ 0.880 5.14 5.56e- 5 1.46 2.37e-2 1.84
9 hVPA_FApC18 vpa_~ 1.40 13.1 4.54e-11 15.6 1.94e-8 2.64
10 hVPA_FApC23 vpa_~ 0.398 4.98 7.91e- 5 1.11 3.37e-2 1.32
# ... with 24 more rows, and 6 more variables: change_in_vpa <chr>,
# compound_full <chr>, formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
#write_csv(vf.cell.pos.hits, path = "./results/vpa_fa_p5_hilic_cell_pos_hits.csv")
vf.cell.pos.hits.tally2 <- vf.cell.pos.hits %>%
group_by(compound_short, compound_full) %>%
count() %>%
filter(n == 2)
vf.cell.pos.lowFA.hits <- vf.cell.pos.hits %>%
filter(term == "vpa_lowFA" & !(compound_short %in% vf.cell.pos.hits.tally2$compound_short))
vf.cell.pos.highFA.hits <- vf.cell.pos.hits %>%
filter(term == "vpa_highFA" & !(compound_short %in% vf.cell.pos.hits.tally2$compound_short))
vf.cell.pos.both.hits <- vf.cell.pos.hits %>%
filter(compound_short %in% vf.cell.pos.hits.tally2$compound_short) %>%
arrange(compound_short, term)
vf.cell.pos.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
ggplot(aes(vpa_lowFA, vpa_highFA)) +
geom_point(size = 2, alpha = 0.8) +
geom_abline(intercept = 0, slope = 1, color = "blue", alpha = 0.8)
vf.cell.pos.both.hits %>%
select(compound_full, term, FC) %>%
spread(key = "term", value = "FC") %>%
mutate(diff = vpa_highFA - vpa_lowFA) %>%
arrange(diff)
# A tibble: 7 x 4
compound_full vpa_highFA vpa_lowFA diff
<chr> <dbl> <dbl> <dbl>
1 Proline 2.16 2.64 -0.485
2 Cystathionine 1.53 1.81 -0.283
3 D-Sorbitol 0.351 0.469 -0.118
4 Glycerol-3-phosphocholine 0.434 0.446 -0.0120
5 UMP 0.453 0.381 0.0717
6 Nicotinamide Mononucleotide 0.463 0.390 0.0730
7 CDP-Choline 2.28 1.84 0.437
### Plotting ###
vf.cell.pos.gathered <- vf.cell.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(vf.cell.pos.surr.var) %>%
select(Samples, VPA, FA, S1, starts_with("hVPA_FApC")) %>%
gather(key = "Compound", value = "Abundance", hVPA_FApC1:hVPA_FApC99)
vf.cell.pos.nested <- vf.cell.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.cell.pos.modSV.resid <- vf.cell.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
vf.cell.pos.modSV.resid %>%
select(Samples:FA, one_of(unique(vf.cell.pos.hits$compound_short))) %>%
HeatmapPrepAlt("hVPA_FApC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA + FA HILIC / Cells / Positive Mode",
margins = c(50, 50, 75, 30),
k_col = 2, k_row = 2,
labRow = unique(vf.cell.pos.hits$compound_full)
)
### PCA - cleaned data ###
vf.cell.pos.modSV.pca <- vf.cell.pos.modSV.resid %>%
select(starts_with("hVPA_FApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vf.cell.pos.modSV.pca.x <- as.data.frame(vf.cell.pos.modSV.pca$x)
row.names(vf.cell.pos.modSV.pca.x) <- vf.cell.pos.modSV.resid$Samples
vf.cell.pos.modSV.pca.x <- vf.cell.pos.modSV.pca.x %>%
bind_cols(vf.cell.pos.modSV.resid %>% select(VPA:FA))
vf.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (66.4% Var)") +
ylab("PC2 (12.5% Var)")
vf.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = VPA, shape = FA)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.1% Var)") +
ylab("PC4 (3.1% Var)")
# sample prep
vf.med.neg.smpl.data <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.med.neg.data.for.sva <- as.matrix(
vf.med.neg.smpl.data[, which(
colnames(vf.med.neg.smpl.data) == "hVPA_FAnM10"
):ncol(vf.med.neg.smpl.data)]
)
row.names(vf.med.neg.data.for.sva) <- vf.med.neg.smpl.data$Samples
vf.med.neg.data.for.sva <- t(vf.med.neg.data.for.sva)
# pheno prep
vf.med.neg.data.pheno <- as.data.frame(vf.med.neg.smpl.data[, 5:6])
row.names(vf.med.neg.data.pheno) <- vf.med.neg.smpl.data$Samples
# sva calculation
vf.med.neg.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.med.neg.data.pheno)
vf.med.neg.mod0 <- model.matrix(~ 1, data = vf.med.neg.data.pheno)
set.seed(2018)
num.sv(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, method = "be")
[1] 0
set.seed(2018)
num.sv(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.med.neg.sv <- sva(vf.med.neg.data.for.sva, vf.med.neg.mod.vf, vf.med.neg.mod0)
No significant surrogate variables
# extract the surrogate variables
colnames(vf.med.neg.mod.vf) <- c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
# combine the full model matrix and the surrogate variables into one
vf.med.neg.design.sv <- vf.med.neg.mod.vf
vf.med.neg.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
)
# fit the model/design matrix
vf.med.neg.eb <- vf.med.neg.data.for.sva %>%
lmFit(vf.med.neg.design.sv) %>%
contrasts.fit(vf.med.neg.cont.mat) %>%
eBayes()
# pull out the results for each metabolite for each comparison
vf.med.neg.eb.tidy <- tidy(vf.med.neg.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.med.neg.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / meds / Negative Mode")
# select statistically significant hits with a certain FC:
vf.med.neg.hits <- vf.med.neg.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.med.neg.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.med.neg.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 1 2
# significant metabolites
sort(unique(vf.med.neg.hits$compound_full))
[1] "Caprylic Acid" "Taurine"
vf.med.neg.hits
# A tibble: 3 x 14
compound_short term estimate statistic p.value lod adj_pval FC
<chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 hVPA_FAnM14 vpa_~ 0.519 6.47 1.12e- 6 3.97 1.74e- 4 1.43
2 hVPA_FAnM21 vpa_~ 5.44 68.5 7.15e-29 56.3 1.12e-26 43.3
3 hVPA_FAnM21 vpa_~ 5.48 69.1 5.78e-29 56.5 9.02e-27 44.8
# ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
# formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
#write_csv(vf.med.neg.hits, path = "./results/vpa_fa_p5_hilic_media_neg_hits.csv")
vf.med.neg.gathered <- vf.med.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(inter = rep(1, 24)) %>%
select(Samples, VPA, FA, inter, starts_with("hVPA_FAnM")) %>%
gather(key = "Compound", value = "Abundance", hVPA_FAnM10:hVPA_FAnM9)
vf.med.neg.nested <- vf.med.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ inter, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.med.neg.modSV.resid <- vf.med.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
# sample prep
vf.med.pos.smpl.data <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
unite("Treatment", VPA:FA, sep = "_")
vf.med.pos.data.for.sva <- as.matrix(
vf.med.pos.smpl.data[, which(
colnames(vf.med.pos.smpl.data) == "hVPA_FApM1"
):ncol(vf.med.pos.smpl.data)]
)
row.names(vf.med.pos.data.for.sva) <- vf.med.pos.smpl.data$Samples
vf.med.pos.data.for.sva <- t(vf.med.pos.data.for.sva)
vf.med.pos.data.pheno <- as.data.frame(vf.med.pos.smpl.data[, 5:6])
row.names(vf.med.pos.data.pheno) <- vf.med.pos.smpl.data$Samples
vf.med.pos.mod.vf <- model.matrix(~ 0 + as.factor(Treatment), data = vf.med.pos.data.pheno)
vf.med.pos.mod0 <- model.matrix(~ 1, data = vf.med.pos.data.pheno)
set.seed(2018)
num.sv(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, method = "be")
[1] 0
set.seed(2018)
num.sv(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, method = "leek")
[1] 0
set.seed(2018)
vf.med.pos.sv <- sva(vf.med.pos.data.for.sva, vf.med.pos.mod.vf, vf.med.pos.mod0)
No significant surrogate variables
colnames(vf.med.pos.mod.vf) <- c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
vf.med.pos.design.sv <- vf.med.pos.mod.vf
vf.med.pos.cont.mat <- makeContrasts(
vpa_lowFA = vpa_only - cntrl,
vpa_highFA = fa_and_vpa - fa_only,
FA_diff = (fa_and_vpa - fa_only) - (vpa_only - cntrl),
levels = c("fa_only", "cntrl", "fa_and_vpa", "vpa_only")
)
vf.med.pos.eb <- vf.med.pos.data.for.sva %>%
lmFit(vf.med.pos.design.sv) %>%
contrasts.fit(vf.med.pos.cont.mat) %>%
eBayes()
vf.med.pos.eb.tidy <- tidy(vf.med.pos.eb) %>%
mutate(
adj_pval = p.adjust(p.value, method = "bonferroni"),
FC = 2 ^ estimate,
change_in_vpa = ifelse(FC < 1, "down", "up")
) %>%
rename(compound_short = gene)
# volcano plot
vf.med.pos.eb.tidy %>%
ggplot(aes(estimate, -log10(adj_pval))) +
geom_point(size = 2, alpha = 0.5) +
geom_hline(linetype = "dashed", color = "#009E73", yintercept = -log10(0.05)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1.2)) +
geom_vline(linetype = "dashed", color = "#CC79A7", xintercept = log2(1/1.2)) +
xlab("log2(FC)") +
ylab("-log10(adjusted p-value)") +
ggtitle("Volcano plot\nVPA + FA / meds / posative Mode")
# select statistically significant hits with a certain FC:
vf.med.pos.hits <- vf.med.pos.eb.tidy %>%
filter(adj_pval < 0.05 & (FC > 1.2 | FC < 1/1.2)) %>%
inner_join(vf.med.pos.compound.info, by = "compound_short")
# how many metabolites are significant across the different contrats
table(vf.med.pos.hits$term)
FA_diff vpa_highFA vpa_lowFA
0 0 0
vf.med.pos.gathered <- vf.med.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(inter = rep(1, 24)) %>%
select(Samples, VPA, FA, inter, starts_with("hVPA_FApM")) %>%
gather(key = "Compound", value = "Abundance", hVPA_FApM1:hVPA_FApM9)
vf.med.pos.nested <- vf.med.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ inter, data = .))) %>%
mutate(augment_model = map(model, augment))
vf.med.pos.modSV.resid <- vf.med.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, VPA, FA, Compound, .resid) %>%
spread(Compound, .resid)
### Cell Neg Mode ###
vf.cell.neg.resid.for.profile <- vf.cell.neg.modSV.resid %>%
select(Samples:FA, one_of(vf.cell.neg.hits$compound_short)) %>%
unite("Treatment", VPA:FA, sep = "_") %>%
gather("Compound", value = "Abundance", -c(Samples, Treatment))
vf.cell.neg.resid.for.profile %>%
group_by(Samples) %>%
count() %>%
filter(n != 23)
# A tibble: 0 x 2
# Groups: Samples [0]
# ... with 2 variables: Samples <chr>, n <int>
vf.cell.neg.resid.avgs <- vf.cell.neg.resid.for.profile %>%
group_by(Compound, Treatment) %>%
summarize(avg.abun = mean(Abundance)) %>%
ungroup()
vf.cell.neg.resid.order <- vf.cell.neg.resid.avgs %>%
separate(Treatment, into = c("VPA", "FA"), sep = "_") %>%
spread(key = "VPA", value = "avg.abun") %>%
mutate(FC = vpa - cntrl) %>%
group_by(Compound) %>%
summarize(avg_FC = mean(FC)) %>%
arrange(avg_FC) %>%
mutate(compound_sort = factor(Compound)) %>%
inner_join(
vf.cell.neg.hits %>%
select(compound_short, compound_full:cas_id) %>%
distinct(),
by = c("Compound" = "compound_short")
)
vf.cell.neg.modSV.resid.prof.plot <- vf.cell.neg.resid.for.profile %>%
mutate(compound_order = factor(Compound, levels = vf.cell.neg.resid.order$compound_sort)) %>%
ggplot(aes(compound_order, Abundance, color = Treatment, group = Samples)) +
geom_line(alpha = 0.3, size = 1.5) +
theme(
axis.ticks.x = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)
) +
scale_x_discrete(name = "Compound", labels = vf.cell.neg.resid.order$compound_full) +
scale_color_manual(values = c("#56B4E9", "#0072B2", "#E69F00", "#D55E00"), labels = c("Control", "5X FA", "VPA", "VPA + 5X FA")) +
ylab("log2(Compound Resid)") +
# overlay the group averages
geom_line(
data = vf.cell.neg.resid.avgs,
aes(Compound, avg.abun, color = Treatment, group = Treatment),
size = 2
) +
theme(plot.margin = margin(5, 5, 5, 40))
vf.cell.neg.modSV.resid.prof.plot
### Cell Pos Mode ###
vf.cell.pos.resid.for.profile <- vf.cell.pos.modSV.resid %>%
select(Samples:FA, one_of(vf.cell.pos.hits$compound_short)) %>%
unite("Treatment", VPA:FA, sep = "_") %>%
gather("Compound", value = "Abundance", -c(Samples, Treatment))
vf.cell.pos.resid.for.profile %>%
group_by(Samples) %>%
count() %>%
filter(n != 27)
# A tibble: 0 x 2
# Groups: Samples [0]
# ... with 2 variables: Samples <chr>, n <int>
vf.cell.pos.resid.avgs <- vf.cell.pos.resid.for.profile %>%
group_by(Compound, Treatment) %>%
summarize(avg.abun = mean(Abundance)) %>%
ungroup()
vf.cell.pos.resid.order <- vf.cell.pos.resid.avgs %>%
separate(Treatment, into = c("VPA", "FA"), sep = "_") %>%
spread(key = "VPA", value = "avg.abun") %>%
mutate(FC = vpa - cntrl) %>%
group_by(Compound) %>%
summarize(avg_FC = mean(FC)) %>%
arrange(avg_FC) %>%
mutate(compound_sort = factor(Compound)) %>%
inner_join(
vf.cell.pos.hits %>%
select(compound_short, compound_full:cas_id) %>%
distinct(),
by = c("Compound" = "compound_short")
)
vf.cell.pos.modSV.resid.prof.plot <- vf.cell.pos.resid.for.profile %>%
mutate(compound_order = factor(Compound, levels = vf.cell.pos.resid.order$compound_sort)) %>%
ggplot(aes(compound_order, Abundance, color = Treatment, group = Samples)) +
geom_line(alpha = 0.3, size = 1.5) +
theme(
axis.ticks.x = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)
) +
scale_x_discrete(name = "Compound", labels = vf.cell.pos.resid.order$compound_full) +
scale_color_manual(values = c("#56B4E9", "#0072B2", "#E69F00", "#D55E00"), labels = c("Control", "5X FA", "VPA", "VPA + 5X FA")) +
ylab("log2(Compound Resid)") +
# overlay the group averages
geom_line(
data = vf.cell.pos.resid.avgs,
aes(Compound, avg.abun, color = Treatment, group = Treatment),
size = 2
) +
theme(plot.margin = margin(5, 5, 5, 40))
vf.cell.pos.modSV.resid.prof.plot
vf.cell.resid.prof.grid.plot <- plot_grid(
vf.cell.neg.modSV.resid.prof.plot,
vf.cell.pos.modSV.resid.prof.plot,
labels = c("A", "B"),
ncol = 1
)
#save_plot("./results/vf_p5_cell_resid_profile_grid_plot.png", vf.cell.resid.prof.grid.plot, base_height = 10, base_width = 8)
vf.cell.neg.both.hits
## # A tibble: 8 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FAnC115 vpa_~ 1.56 10.9 9.67e-10 12.5 3.83e-7 2.94
## 2 hVPA_FAnC115 vpa_~ 0.894 5.58 2.04e- 5 2.58 8.08e-3 1.86
## 3 hVPA_FAnC19 vpa_~ 1.41 10.8 1.10e- 9 12.3 4.36e-7 2.66
## 4 hVPA_FAnC19 vpa_~ 1.59 10.8 1.15e- 9 12.4 4.57e-7 3.00
## 5 hVPA_FAnC78 vpa_~ -1.42 -14.9 4.45e-12 17.9 1.76e-9 0.374
## 6 hVPA_FAnC78 vpa_~ -1.13 -10.5 1.75e- 9 12.0 6.91e-7 0.456
## 7 hVPA_FAnC90 vpa_~ 1.00 5.18 4.93e- 5 1.42 1.95e-2 2.00
## 8 hVPA_FAnC90 vpa_~ 1.17 5.40 3.04e- 5 2.18 1.20e-2 2.25
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.pos.both.hits
## # A tibble: 14 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FApC105 vpa_~ -1.14 -7.34 5.30e- 7 5.91 2.26e- 4 0.453
## 2 hVPA_FApC105 vpa_~ -1.39 -7.86 1.94e- 7 7.19 8.27e- 5 0.381
## 3 hVPA_FApC107 vpa_~ -1.11 -7.32 5.50e- 7 5.87 2.34e- 4 0.463
## 4 hVPA_FApC107 vpa_~ -1.36 -7.87 1.89e- 7 7.21 8.06e- 5 0.390
## 5 hVPA_FApC127 vpa_~ 1.19 7.88 1.86e- 7 6.98 7.94e- 5 2.28
## 6 hVPA_FApC127 vpa_~ 0.880 5.14 5.56e- 5 1.46 2.37e- 2 1.84
## 7 hVPA_FApC18 vpa_~ 1.11 11.8 2.81e-10 13.6 1.20e- 7 2.16
## 8 hVPA_FApC18 vpa_~ 1.40 13.1 4.54e-11 15.6 1.94e- 8 2.64
## 9 hVPA_FApC69 vpa_~ -1.51 -18.3 1.11e-13 21.6 4.71e-11 0.351
## 10 hVPA_FApC69 vpa_~ -1.09 -11.7 3.45e-10 13.6 1.47e- 7 0.469
## 11 hVPA_FApC83 vpa_~ 0.614 5.59 2.04e- 5 2.18 8.70e- 3 1.53
## 12 hVPA_FApC83 vpa_~ 0.859 6.88 1.35e- 6 5.22 5.73e- 4 1.81
## 13 hVPA_FApC91 vpa_~ -1.20 -15.1 3.77e-12 18.0 1.60e- 9 0.434
## 14 hVPA_FApC91 vpa_~ -1.16 -12.8 6.54e-11 15.2 2.79e- 8 0.446
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.neg.lowFA.hits
## # A tibble: 9 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FAnC127 vpa_~ 0.918 5.33 3.55e-5 2.02 0.0140 1.89
## 2 hVPA_FAnC128 vpa_~ 0.812 4.99 7.61e-5 1.26 0.0301 1.76
## 3 hVPA_FAnC133 vpa_~ 0.594 5.02 7.14e-5 1.32 0.0283 1.51
## 4 hVPA_FAnC134 vpa_~ 0.845 4.82 1.11e-4 0.879 0.0440 1.80
## 5 hVPA_FAnC18 vpa_~ 0.669 5.67 1.67e-5 2.78 0.00662 1.59
## 6 hVPA_FAnC39 vpa_~ 1.17 5.33 3.54e-5 2.02 0.0140 2.24
## 7 hVPA_FAnC50 vpa_~ 1.72 5.05 6.70e-5 1.39 0.0265 3.30
## 8 hVPA_FAnC65 vpa_~ -0.588 -5.28 4.01e-5 1.90 0.0159 0.665
## 9 hVPA_FAnC83 vpa_~ 0.723 5.68 1.64e-5 2.80 0.00650 1.65
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.neg.highFA.hits
## # A tibble: 10 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FAnC10 vpa_~ 0.436 4.99 7.61e-5 0.981 0.0301 1.35
## 2 hVPA_FAnC103 vpa_~ 0.939 7.12 7.97e-7 5.61 0.000316 1.92
## 3 hVPA_FAnC11 vpa_~ 0.431 4.95 8.29e-5 0.895 0.0328 1.35
## 4 hVPA_FAnC43 vpa_~ -0.889 -4.92 9.00e-5 0.812 0.0356 0.540
## 5 hVPA_FAnC44 vpa_~ 0.485 4.98 7.76e-5 0.961 0.0307 1.40
## 6 hVPA_FAnC74 vpa_~ -0.544 -4.97 8.07e-5 0.922 0.0319 0.686
## 7 hVPA_FAnC77 vpa_~ -0.350 -4.87 9.94e-5 0.711 0.0394 0.785
## 8 hVPA_FAnC87 vpa_~ 0.394 5.07 6.42e-5 1.15 0.0254 1.31
## 9 hVPA_FAnC96 vpa_~ 0.914 6.55 2.53e-6 4.43 0.00100 1.88
## 10 hVPA_FAnC98 vpa_~ 1.93 6.41 3.45e-6 4.12 0.00137 3.81
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.pos.lowFA.hits
## # A tibble: 16 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FApC102 vpa_~ 0.803 5.23 4.55e-5 1.66 0.0194 1.74
## 2 hVPA_FApC103 vpa_~ 0.453 5.05 6.74e-5 1.27 0.0287 1.37
## 3 hVPA_FApC113 vpa_~ -1.04 -5.16 5.35e-5 1.50 0.0228 0.486
## 4 hVPA_FApC114 vpa_~ -1.44 -5.06 6.70e-5 1.27 0.0285 0.369
## 5 hVPA_FApC117 vpa_~ -1.22 -5.36 3.41e-5 1.95 0.0145 0.428
## 6 hVPA_FApC23 vpa_~ 0.398 4.98 7.91e-5 1.11 0.0337 1.32
## 7 hVPA_FApC24 vpa_~ -0.824 -5.47 2.66e-5 2.21 0.0113 0.565
## 8 hVPA_FApC35 vpa_~ 0.828 5.92 1.00e-5 3.19 0.00427 1.78
## 9 hVPA_FApC39 vpa_~ -1.18 -5.97 8.95e-6 3.31 0.00381 0.443
## 10 hVPA_FApC59 vpa_~ -0.647 -4.82 1.15e-4 0.730 0.0491 0.639
## 11 hVPA_FApC81 vpa_~ -0.805 -5.04 6.98e-5 1.23 0.0297 0.572
## 12 hVPA_FApC82 vpa_~ -0.405 -5.64 1.81e-5 2.59 0.00773 0.755
## 13 hVPA_FApC90 vpa_~ -1.26 -5.34 3.56e-5 1.91 0.0152 0.419
## 14 hVPA_FApC96 vpa_~ -1.03 -5.74 1.48e-5 2.80 0.00631 0.491
## 15 hVPA_FApC97 vpa_~ -1.13 -5.18 5.08e-5 1.55 0.0216 0.456
## 16 hVPA_FApC98 vpa_~ -1.30 -4.90 9.60e-5 0.913 0.0409 0.406
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.pos.highFA.hits
## # A tibble: 4 x 14
## compound_short term estimate statistic p.value lod adj_pval FC
## <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hVPA_FApC125 vpa_~ -0.504 -5.88 1.08e-5 2.82 0.00462 0.705
## 2 hVPA_FApC28 vpa_~ -0.580 -5.41 3.03e-5 1.78 0.0129 0.669
## 3 hVPA_FApC40 vpa_~ -0.423 -5.95 9.34e-6 2.97 0.00398 0.746
## 4 hVPA_FApC92 vpa_~ 0.481 5.41 3.02e-5 1.78 0.0129 1.40
## # ... with 6 more variables: change_in_vpa <chr>, compound_full <chr>,
## # formula <chr>, mass <dbl>, rt <dbl>, cas_id <chr>
vf.cell.neg.both.hits %>%
semi_join(vf.cell.pos.both.hits, by = "compound_full") %>%
nrow() / 2
## [1] 3
vf.cell.neg.both.hits %>%
semi_join(vf.cell.pos.highFA.hits, by = "compound_full")
## # A tibble: 0 x 14
## # ... with 14 variables: compound_short <chr>, term <fct>, estimate <dbl>,
## # statistic <dbl>, p.value <dbl>, lod <dbl>, adj_pval <dbl>, FC <dbl>,
## # change_in_vpa <chr>, compound_full <chr>, formula <chr>, mass <dbl>,
## # rt <dbl>, cas_id <chr>
vf.cell.neg.both.hits %>%
semi_join(vf.cell.pos.lowFA.hits, by = "compound_full") %>%
nrow() / 2
## [1] 0
vf.cell.neg.both.hits %>%
anti_join(vf.cell.pos.highFA.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.lowFA.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.both.hits, by = "compound_full") %>%
nrow() / 2
## [1] 1
vf.cell.neg.lowFA.hits %>%
semi_join(vf.cell.pos.lowFA.hits, by = "compound_full") %>%
nrow()
## [1] 0
vf.cell.neg.lowFA.hits %>%
semi_join(vf.cell.pos.highFA.hits, by = "compound_full") %>%
nrow()
## [1] 0
vf.cell.neg.lowFA.hits %>%
anti_join(vf.cell.pos.both.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.lowFA.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.highFA.hits, by = "compound_full") %>%
nrow()
## [1] 9
vf.cell.pos.both.hits %>%
semi_join(vf.cell.neg.highFA.hits, by = "compound_full")
## # A tibble: 0 x 14
## # ... with 14 variables: compound_short <chr>, term <fct>, estimate <dbl>,
## # statistic <dbl>, p.value <dbl>, lod <dbl>, adj_pval <dbl>, FC <dbl>,
## # change_in_vpa <chr>, compound_full <chr>, formula <chr>, mass <dbl>,
## # rt <dbl>, cas_id <chr>
vf.cell.pos.highFA.hits %>%
semi_join(vf.cell.neg.highFA.hits, by = "compound_full")
## # A tibble: 0 x 14
## # ... with 14 variables: compound_short <chr>, term <fct>, estimate <dbl>,
## # statistic <dbl>, p.value <dbl>, lod <dbl>, adj_pval <dbl>, FC <dbl>,
## # change_in_vpa <chr>, compound_full <chr>, formula <chr>, mass <dbl>,
## # rt <dbl>, cas_id <chr>
vf.cell.neg.highFA.hits %>%
anti_join(vf.cell.pos.both.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.highFA.hits, by = "compound_full") %>%
anti_join(vf.cell.pos.lowFA.hits, by = "compound_full") %>%
nrow()
## [1] 10
vf.cell.pos.highFA.hits %>%
anti_join(vf.cell.neg.both.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.highFA.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.lowFA.hits, by = "compound_full") %>%
nrow()
## [1] 4
vf.cell.pos.lowFA.hits %>%
semi_join(vf.cell.neg.highFA.hits, by = "compound_full")
## # A tibble: 0 x 14
## # ... with 14 variables: compound_short <chr>, term <fct>, estimate <dbl>,
## # statistic <dbl>, p.value <dbl>, lod <dbl>, adj_pval <dbl>, FC <dbl>,
## # change_in_vpa <chr>, compound_full <chr>, formula <chr>, mass <dbl>,
## # rt <dbl>, cas_id <chr>
vf.cell.pos.both.hits %>%
anti_join(vf.cell.neg.highFA.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.lowFA.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.both.hits, by = "compound_full") %>%
nrow() / 2
## [1] 4
vf.cell.pos.lowFA.hits %>%
anti_join(vf.cell.neg.both.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.lowFA.hits, by = "compound_full") %>%
anti_join(vf.cell.neg.highFA.hits, by = "compound_full") %>%
nrow()
## [1] 16
(not evaluated)
#write_csv(vf.cell.neg.modSV.resid, path = "./results/modsv_resid/vpa_fa_p5_cell_neg_modSV_resid.csv")
#write_csv(vf.cell.pos.modSV.resid, path = "./results/modsv_resid/vpa_fa_p5_cell_pos_modSV_resid.csv")
#write_csv(vf.med.neg.smpl.data, path = "./results/modsv_resid/vpa_fa_p5_media_neg_smpl_data.csv")
# write_csv(vf.med.pos.modSV.resid, path = "./results/modsv_resid/vpa_fa_p5_med_pos_modSV_resid.csv")
# write_csv(vf.med.neg.modSV.resid, path = "./results/modsv_resid/vpa_fa_p5_med_neg_modSV_resid.csv")
sessionInfo()
R version 3.5.3 (2019-03-11)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggthemes_4.1.0 ggridges_0.5.1 biobroom_1.14.0
[4] broom_0.5.1 limma_3.38.3 sva_3.30.0
[7] BiocParallel_1.16.2 genefilter_1.64.0 mgcv_1.8-28
[10] nlme_3.1-137 heatmaply_0.15.2 viridis_0.5.1
[13] viridisLite_0.3.0 plotly_4.8.0 GGally_1.4.0
[16] cowplot_0.9.4 forcats_0.4.0 stringr_1.4.0
[19] dplyr_0.8.0.1 purrr_0.3.2 readr_1.3.1
[22] tidyr_0.8.3 tibble_2.1.1 ggplot2_3.1.0
[25] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] colorspace_1.4-1 class_7.3-15 modeltools_0.2-22
[4] mclust_5.4.3 rstudioapi_0.10 flexmix_2.3-15
[7] bit64_0.9-7 fansi_0.4.0 AnnotationDbi_1.44.0
[10] mvtnorm_1.0-10 lubridate_1.7.4 xml2_1.2.0
[13] codetools_0.2-16 splines_3.5.3 robustbase_0.93-4
[16] knitr_1.22 jsonlite_1.6 annotate_1.60.0
[19] cluster_2.0.7-1 kernlab_0.9-27 shiny_1.2.0
[22] compiler_3.5.3 httr_1.4.0 backports_1.1.3
[25] assertthat_0.2.1 Matrix_1.2-16 lazyeval_0.2.2
[28] cli_1.1.0 later_0.8.0 htmltools_0.3.6
[31] tools_3.5.3 gtable_0.2.0 glue_1.3.1
[34] reshape2_1.4.3 Rcpp_1.0.1 Biobase_2.42.0
[37] cellranger_1.1.0 trimcluster_0.1-2.1 gdata_2.18.0
[40] crosstalk_1.0.0 iterators_1.0.10 fpc_2.1-11.1
[43] xfun_0.5 rvest_0.3.2 mime_0.6
[46] gtools_3.8.1 XML_3.98-1.19 dendextend_1.10.0
[49] DEoptimR_1.0-8 MASS_7.3-51.1 scales_1.0.0
[52] TSP_1.1-6 promises_1.0.1 hms_0.4.2
[55] parallel_3.5.3 RColorBrewer_1.1-2 yaml_2.2.0
[58] memoise_1.1.0 gridExtra_2.3 reshape_0.8.8
[61] stringi_1.4.3 RSQLite_2.1.1 gclus_1.3.2
[64] S4Vectors_0.20.1 foreach_1.4.4 seriation_1.2-3
[67] caTools_1.17.1.2 BiocGenerics_0.28.0 matrixStats_0.54.0
[70] rlang_0.3.2 pkgconfig_2.0.2 prabclus_2.2-7
[73] bitops_1.0-6 evaluate_0.13 lattice_0.20-38
[76] labeling_0.3 htmlwidgets_1.3 bit_1.1-14
[79] tidyselect_0.2.5 plyr_1.8.4 magrittr_1.5
[82] R6_2.4.0 IRanges_2.16.0 gplots_3.0.1.1
[85] generics_0.0.2 DBI_1.0.0 pillar_1.3.1
[88] haven_2.1.0 whisker_0.3-2 withr_2.1.2
[91] survival_2.43-3 RCurl_1.95-4.12 nnet_7.3-12
[94] modelr_0.1.4 crayon_1.3.4 utf8_1.1.4
[97] KernSmooth_2.23-15 rmarkdown_1.12 grid_3.5.3
[100] readxl_1.3.1 data.table_1.12.0 blob_1.1.1
[103] digest_0.6.18 diptest_0.75-7 webshot_0.5.1
[106] xtable_1.8-3 httpuv_1.5.0 stats4_3.5.3
[109] munsell_0.5.0 registry_0.5-1